library("tidyverse")
library("data.table")
library("rtracklayer")
library("ggrastr")
library("DESeq2")
library("ggpubr")
library("wigglescout")
library("eulerr")
library("ggplot2")

# export 
result_folder = "../results/wigglescout/"

bws <- list.files("../data/CutNTag_ChIP-Seq/bw/",
                  full.names = TRUE)
# diff signal function
bw_granges_diff_analysis <- function(granges_c1,
                                     granges_c2,
                                     label_c1,
                                     label_c2,
                                     estimate_size_factors = FALSE,
                                     as_granges = FALSE) {
  # Bind first, get numbers after
  names_values <- NULL
  fields <- names(mcols(granges_c1))
  
  if ("name" %in% fields) {
    names_values <- mcols(granges_c1)[["name"]]
    granges_c1 <- granges_c1[, fields[fields != "name"]]
  }
  
  fields <- names(mcols(granges_c2))
  if ("name" %in% fields) {
    granges_c2 <- granges_c2[, fields[fields != "name"]]
  }
  
  cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
  
  if (!is.null(names_values)) {
    rownames(cts_df) <- names_values
  }
  
  # Needs to drop non-complete cases and match rows
  complete <- complete.cases(cts_df)
  cts_df <- cts_df[complete, ]
  
  values_df <- cts_df[, 6:ncol(cts_df)] %>% dplyr::select(where(is.numeric))
  cts <- get_nreads_columns(values_df)
  
  condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), rep(label_c2, length(mcols(granges_c2))))
  
  
  coldata <- data.frame(colnames(cts), condition = as.factor(condition_labels))
  print(coldata)
  
  dds <- DESeq2::DESeqDataSetFromMatrix(
    countData = cts,
    colData = coldata,
    design = ~ condition,
    rowRanges = granges_c1[complete, ]
  )
  
  
  if (estimate_size_factors == TRUE) {
    dds <- DESeq2::estimateSizeFactors(dds)
  }
  else {
    # Since files are scaled, we do not want to estimate size factors
    sizeFactors(dds) <- c(rep(1, ncol(cts)))
  }
  
  dds <- DESeq2::estimateDispersions(dds)
  dds <- DESeq2::nbinomWaldTest(dds)
  
  if (as_granges) {
    result <- DESeq2::results(dds, format = "GRanges", alpha = 0.01)
    if (!is.null(names_values)) {
      result$name <- names_values[complete]
    }
    
  }
  else {
    # result <- results(dds, format="DataFrame")
    result <- dds
  }
  
  result
}

get_nreads_columns <- function(df, length_factor = 100) {
  # Convert mean coverages to round integer read numbers
  cts <- as.matrix(df)
  cts <- as.matrix(cts[complete.cases(cts), ])
  cts <- round(cts * length_factor)
  cts
}
ctcf.and.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCFonly.bed")
ctcf.and.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCFonly.bed")

ctcf.and.G4.pro$class <- "CTCF_and_G4"
ctcf.not.G4.pro$class <- "CTCF_not_G4"
ctcf.and.G4.npr$class <- "CTCF_and_G4"
ctcf.not.G4.npr$class <- "CTCF_not_G4"
ctcf.and.G4.pro$pro <- "Pro"
ctcf.not.G4.pro$pro <- "Pro"
ctcf.and.G4.npr$pro <- "noPro"
ctcf.not.G4.npr$pro <- "noPro"

ctcf.and.G4 <- c(ctcf.and.G4.pro,ctcf.and.G4.npr)
ctcf.not.G4 <- c(ctcf.not.G4.pro,ctcf.not.G4.npr)
ctcf <- c(ctcf.and.G4,ctcf.not.G4)
ctcf <- sortSeqlevels(ctcf)
ctcf <- sort(ctcf)
names(ctcf) <- paste0(ctcf$class," ",ctcf$pro)
peaks_bed <- "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed"
export.bed(ctcf, "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed")
# Wulfridge CTCF ChIP-Seq mocks
mocks_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958387_GSM7116277_E14_Mock_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
          "../data/CutNTag_ChIP-Seq/bw/SRR23958386_GSM7116278_E14_Mock_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

# PDS
# Wulfridge CTCF ChIP-Seq
pds_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958385_GSM7116279_E14_PDS_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958384_GSM7116280_E14_PDS_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

pdc_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958383_GSM7116281_E14_PhenDC3_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958382_GSM7116282_E14_PhenDC3_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

G4_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/G4_CnT_NT_batch3_R1.unique.bw")

cov.mocks <- bw_loci(mocks_bigwigs,peaks_bed,labels=c("mock_1","mock_2"))
cov.pds <- bw_loci(pds_bigwigs,peaks_bed,labels=c("PDS_1","PDS_2"))
cov.pdc <- bw_loci(pdc_bigwigs,peaks_bed,labels=c("PhenDC3_1","PhenDC3_2"))
cov.G4 <- bw_loci(G4_bigwigs,peaks_bed,labels=c("G4_NT"))
results <- data.frame(
  as.data.frame(cov.mocks),
  as.data.frame(cov.pds)[6:7],
  as.data.frame(cov.pdc)[6:7],
  as.data.frame(cov.G4)[6],
  raw.lfc.pds_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pds_2 = log2(cov.pds$PDS_2/cov.mocks$mock_2),
  raw.lfc.pdc_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2/cov.mocks$mock_2),
  raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
  mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
  mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
  )

cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL

de_pds <- bw_granges_diff_analysis(cov.mocks,cov.pds,"Mock","PDS")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
    Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
    sequence count data: removing the noise and preserving large differences.
    Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
de_pdc <- bw_granges_diff_analysis(cov.mocks,cov.pdc,"Mock","PhenDC3")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
    Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
    sequence count data: removing the noise and preserving large differences.
    Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05

results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05

results$class <- gsub(" .+","",results$name)
results$pro <- gsub(".+ ","",results$name)
results$class <- factor(results$class,levels=c("CTCF_and_G4","CTCF_not_G4"))

results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)

results$deseq.sigup.pds <- results$deseq.sig.pds & results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc & results$deseq.lfc.pdc > 0
write.table(results,"foldchange_results.txt")
table(results$name)

CTCF_and_G4 noPro   CTCF_and_G4 Pro CTCF_not_G4 noPro   CTCF_not_G4 Pro 
              988              1300             43309              6016 
results <- read.table("foldchange_results.txt")
mypal <-c("cornflowerblue","orange","red2","darkgreen","#505050")
mypal2 <-c("cornflowerblue","cornflowerblue","orange","orange","red2","red2","darkgreen","darkgreen","#505050","#505050")
ggviolin(results,x ="class",y="mean.mock",fill="class",palette=mypal,add="mean_sd") + coord_cartesian(ylim=c(0,10))

ggviolin(results,x ="pro",y="mean.mock",fill="class",palette=mypal,add="mean_sd") + coord_cartesian(ylim=c(0,10))

mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
Using class as id variables
ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal ) + coord_cartesian(ylim=c(0,10))

mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
Using class as id variables
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal,add="mean_sd") + coord_cartesian(ylim=c(0,10))

mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds","raw.lfc.pdc")))
Using class as id variables
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="mean_sd") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-3,3))
Warning: Removed 49 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 49 rows containing non-finite outside the scale range (`stat_summary()`).

compare_means(raw.lfc.pds ~ class,data = results)
compare_means(raw.lfc.pdc ~ class,data = results,)
mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds_1","raw.lfc.pds_2","raw.lfc.pdc_1","raw.lfc.pdc_2")))
Using class as id variables
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="mean_sd") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-4,4))
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).

mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds","raw.lfc.pdc")))
Using class as id variables
ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal) + coord_cartesian(ylim=c(-3,3))
Warning: Removed 49 rows containing non-finite outside the scale range (`stat_boxplot()`).

ggdensity(results,x="raw.lfc.pds",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
Warning: Removed 26 rows containing non-finite outside the scale range (`stat_density()`).

ggdensity(results,x="raw.lfc.pdc",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
Warning: Removed 23 rows containing non-finite outside the scale range (`stat_density()`).

ggscatterhist(results,x ="log2.mock",y="log2.pds",size = 0.2, alpha=0.1,color="deseq.sig.pds",margin.params = list(fill="class",color="black",size=0.2))
Warning: Removed 15 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 12 rows containing non-finite outside the scale range (`stat_density()`).

ggscatterhist(results,x ="log2.mock",y="log2.pdc",size = 0.2, alpha=0.1,color="deseq.sig.pdc",margin.params = list(fill="class",color="black",size=0.2))
Warning: Removed 15 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 8 rows containing non-finite outside the scale range (`stat_density()`).

table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$pro)
       
        noPro   Pro
  FALSE 42094  6938
  TRUE   2200   378
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class)
       
        CTCF_and_G4 CTCF_not_G4
  FALSE        2220       46812
  TRUE           68        2510
mdf<-reshape2::melt(table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()

vl <- list(sig=grep("TRUE",results$deseq.sigup.pds),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)

table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class)
       
        CTCF_and_G4 CTCF_not_G4
  FALSE        2251       48545
  TRUE           37         780
mdf<-reshape2::melt(table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()

results$uid <- seq(1:nrow(results))
vl <- list(sig=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)

table(results$deseq.sigup.pds,results$deseq.sigup.pdc)
       
        FALSE  TRUE
  FALSE 48576   456
  TRUE   2217   361
mdf<-reshape2::melt(table(results$deseq.sigup.pds,results$deseq.sigup.pdc))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()

vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)

results$uid <- seq(1:nrow(results))
vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class))
plot(euler(vl),quantities=T)

results$sig.by.class.pds <- paste0(results$deseq.sigup.pds,"_",results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds] <- 0.5

ggscatter(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="log2.pds",size = 0.2, alpha="psize",color = "sig.by.class.pds",palette=c("#505050","#505050","red2","blue"))

ggscatter(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="deseq.lfc.pds",size = 0.2, alpha="psize",color = "sig.by.class.pds",palette=c("#505050","#505050","red2","blue"))

ggscatterhist(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="log2.pds",size = 0.4, alpha="psize",color = "sig.by.class.pds",margin.params = list(fill="sig.by.class.pds",color="black",size=0.2),palette=c("#505050","#505050","red2","blue"))
Warning: Removed 13 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 10 rows containing non-finite outside the scale range (`stat_density()`).

ggscatter(results,x ="raw.lfc.pds",y="log2.G4",size = 0.2, alpha=0.1,color="class")
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

ggscatter(results[results$pro=="Pro",],x ="log2.mock",y="G4_NT",size = 0.2, alpha=0.1,color="class",mean.point = T, mean.point.size = 4) + coord_cartesian(ylim=c(0,100))

ggscatter(results,x ="log2.mock",y="G4_NT",size = 0.2, alpha=0.1,color="name",mean.point = T, mean.point.size = 4, palette = mypal) + coord_cartesian(ylim=c(0,50))
Warning: Removed 15 rows containing non-finite outside the scale range (`stat_mean()`).

results$G4.quantile <- cut_number(results$G4_NT, n = 5,labels=F)
ggstripchart(results,x="G4.quantile", y="deseq.lfc.pds",color="red2",add=c("violin","mean_sd"),add.params=list(color="black",fill="cornflowerblue"),size=0.2, alpha=results$deseq.sig.pds/4, palette=mypal) + coord_cartesian(ylim=c(-3,3)) + geom_hline(yintercept = 0, linetype="dotted")
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).

table(results$G4.quantile,results$deseq.sig.pds)
   
    FALSE TRUE
  1  9565  757
  2  9759  561
  3  9794  529
  4  9916  406
  5  9988  335
ggviolin(results,x="G4.quantile", y="deseq.lfc.pds",fill="cornflowerblue",add="mean_sd") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0, linetype="dotted")
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).

ggviolin(results,x="G4.quantile", y="log2.G4",fill="cornflowerblue",add="mean_sd") + geom_hline(yintercept = 0, linetype="dotted")
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_summary()`).

ggscatterhist(results,x ="raw.lfc.pds",y="log2.G4",size = 0.2, alpha=0.1,color="deseq.sigup.pds",margin.params = list(fill="deseq.sigup.pds",color="black",size=0.2))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 26 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 9483 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Groups with fewer than two data points have been dropped.
Warning in max(ids, na.rm = TRUE) :
  no non-missing arguments to max; returning -Inf

peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/CutNTag_ChIP-Seq/bw/GSM6634325_E14_Mock_G4.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 2284 generated ( 0.0527909395585346 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 324 generated ( 0.053919121318023 per locus)

peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile(G4_bigwigs,peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)

plot_bw_profile(mocks_bigwigs[1],peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 2291 generated ( 0.0529527331561308 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 306 generated ( 0.0509236145781328 per locus)

p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)

p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 805 generated ( 0.0186062637235641 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 810 generated ( 0.0187218305789899 per locus)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
Warning in .summarize_matrix(fg, label) :
  Profile plot: 131 generated ( 0.0218006323847562 per locus)
ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)

cen we learn something from the genes with top-most increase in CTCF?

sigup.pds <- results[results$deseq.sigup.pds,]
sigup.pdc <- results[results$deseq.sigup.pdc,]

#export.bed(sigup.pds,"peaks_sigup_pds.bed")
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
---
title: "R Notebook"
output: html_notebook
---


```{r}
library("tidyverse")
library("data.table")
library("rtracklayer")
library("ggrastr")
library("DESeq2")
library("ggpubr")
library("wigglescout")
library("eulerr")
library("ggplot2")

# export 
result_folder = "../results/wigglescout/"

bws <- list.files("../data/CutNTag_ChIP-Seq/bw/",
                  full.names = TRUE)

```

```{r}
# diff signal function
bw_granges_diff_analysis <- function(granges_c1,
                                     granges_c2,
                                     label_c1,
                                     label_c2,
                                     estimate_size_factors = FALSE,
                                     as_granges = FALSE) {
  # Bind first, get numbers after
  names_values <- NULL
  fields <- names(mcols(granges_c1))
  
  if ("name" %in% fields) {
    names_values <- mcols(granges_c1)[["name"]]
    granges_c1 <- granges_c1[, fields[fields != "name"]]
  }
  
  fields <- names(mcols(granges_c2))
  if ("name" %in% fields) {
    granges_c2 <- granges_c2[, fields[fields != "name"]]
  }
  
  cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
  
  if (!is.null(names_values)) {
    rownames(cts_df) <- names_values
  }
  
  # Needs to drop non-complete cases and match rows
  complete <- complete.cases(cts_df)
  cts_df <- cts_df[complete, ]
  
  values_df <- cts_df[, 6:ncol(cts_df)] %>% dplyr::select(where(is.numeric))
  cts <- get_nreads_columns(values_df)
  
  condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), rep(label_c2, length(mcols(granges_c2))))
  
  
  coldata <- data.frame(colnames(cts), condition = as.factor(condition_labels))
  print(coldata)
  
  dds <- DESeq2::DESeqDataSetFromMatrix(
    countData = cts,
    colData = coldata,
    design = ~ condition,
    rowRanges = granges_c1[complete, ]
  )
  
  
  if (estimate_size_factors == TRUE) {
    dds <- DESeq2::estimateSizeFactors(dds)
  }
  else {
    # Since files are scaled, we do not want to estimate size factors
    sizeFactors(dds) <- c(rep(1, ncol(cts)))
  }
  
  dds <- DESeq2::estimateDispersions(dds)
  dds <- DESeq2::nbinomWaldTest(dds)
  
  if (as_granges) {
    result <- DESeq2::results(dds, format = "GRanges", alpha = 0.01)
    if (!is.null(names_values)) {
      result$name <- names_values[complete]
    }
    
  }
  else {
    # result <- results(dds, format="DataFrame")
    result <- dds
  }
  
  result
}

get_nreads_columns <- function(df, length_factor = 100) {
  # Convert mean coverages to round integer read numbers
  cts <- as.matrix(df)
  cts <- as.matrix(cts[complete.cases(cts), ])
  cts <- round(cts * length_factor)
  cts
}
```

```{r}
ctcf.and.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.pro <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_with_promoters.sorted-CTCFonly.bed")
ctcf.and.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCF_G4.bed")
ctcf.not.G4.npr <- import("../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories_without_promoters.sorted-CTCFonly.bed")

ctcf.and.G4.pro$class <- "CTCF_and_G4"
ctcf.not.G4.pro$class <- "CTCF_not_G4"
ctcf.and.G4.npr$class <- "CTCF_and_G4"
ctcf.not.G4.npr$class <- "CTCF_not_G4"
ctcf.and.G4.pro$pro <- "Pro"
ctcf.not.G4.pro$pro <- "Pro"
ctcf.and.G4.npr$pro <- "noPro"
ctcf.not.G4.npr$pro <- "noPro"

ctcf.and.G4 <- c(ctcf.and.G4.pro,ctcf.and.G4.npr)
ctcf.not.G4 <- c(ctcf.not.G4.pro,ctcf.not.G4.npr)
ctcf <- c(ctcf.and.G4,ctcf.not.G4)
ctcf <- sortSeqlevels(ctcf)
ctcf <- sort(ctcf)
names(ctcf) <- paste0(ctcf$class," ",ctcf$pro)
peaks_bed <- "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed"
export.bed(ctcf, "../data/CutNTag_ChIP-Seq/bed/Wulfridge_CTCF_in_6_categories.bed")
```

```{r}
# Wulfridge CTCF ChIP-Seq mocks
mocks_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958387_GSM7116277_E14_Mock_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
          "../data/CutNTag_ChIP-Seq/bw/SRR23958386_GSM7116278_E14_Mock_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

# PDS
# Wulfridge CTCF ChIP-Seq
pds_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958385_GSM7116279_E14_PDS_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958384_GSM7116280_E14_PDS_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

pdc_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958383_GSM7116281_E14_PhenDC3_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
                "../data/CutNTag_ChIP-Seq/bw/SRR23958382_GSM7116282_E14_PhenDC3_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")

G4_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/G4_CnT_NT_batch3_R1.unique.bw")

cov.mocks <- bw_loci(mocks_bigwigs,peaks_bed,labels=c("mock_1","mock_2"))
cov.pds <- bw_loci(pds_bigwigs,peaks_bed,labels=c("PDS_1","PDS_2"))
cov.pdc <- bw_loci(pdc_bigwigs,peaks_bed,labels=c("PhenDC3_1","PhenDC3_2"))
cov.G4 <- bw_loci(G4_bigwigs,peaks_bed,labels=c("G4_NT"))
```


```{r}
results <- data.frame(
  as.data.frame(cov.mocks),
  as.data.frame(cov.pds)[6:7],
  as.data.frame(cov.pdc)[6:7],
  as.data.frame(cov.G4)[6],
  raw.lfc.pds_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pds_2 = log2(cov.pds$PDS_2/cov.mocks$mock_2),
  raw.lfc.pdc_1 = log2(cov.pds$PDS_1/cov.mocks$mock_1),
  raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2/cov.mocks$mock_2),
  raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7])/rowMeans(as.data.frame(cov.mocks)[6:7])),
  mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
  mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
  mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
  )

cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL

de_pds <- bw_granges_diff_analysis(cov.mocks,cov.pds,"Mock","PDS")
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")

de_pdc <- bw_granges_diff_analysis(cov.mocks,cov.pdc,"Mock","PhenDC3")
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")

results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05

results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05

results$class <- gsub(" .+","",results$name)
results$pro <- gsub(".+ ","",results$name)
results$class <- factor(results$class,levels=c("CTCF_and_G4","CTCF_not_G4"))

results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)

results$deseq.sigup.pds <- results$deseq.sig.pds & results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc & results$deseq.lfc.pdc > 0
```

```{r}
write.table(results,"foldchange_results.txt")
table(results$name)
```

```{r}
results <- read.table("foldchange_results.txt")
```

```{r}
mypal <-c("cornflowerblue","orange","red2","darkgreen","#505050")
mypal2 <-c("cornflowerblue","cornflowerblue","orange","orange","red2","red2","darkgreen","darkgreen","#505050","#505050")
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x ="class",y="mean.mock",fill="class",palette=mypal,add="mean_sd") + coord_cartesian(ylim=c(0,10))
```
```{r fig.width=3, fig.height=3}
ggviolin(results,x ="pro",y="mean.mock",fill="class",palette=mypal,add="mean_sd") + coord_cartesian(ylim=c(0,10))
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal ) + coord_cartesian(ylim=c(0,10))
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","mock_1","mock_2","PDS_1","PDS_2","PhenDC3_1","PhenDC3_2")))
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal,add="mean_sd") + coord_cartesian(ylim=c(0,10))
```
```{r fig.width=3, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds","raw.lfc.pdc")))
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="mean_sd") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-3,3))
```

```{r}
compare_means(raw.lfc.pds ~ class,data = results)
```

```{r}
compare_means(raw.lfc.pdc ~ class,data = results,)
```

```{r fig.width=5, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds_1","raw.lfc.pds_2","raw.lfc.pdc_1","raw.lfc.pdc_2")))
ggviolin(mdf,x ="variable",y="value",fill="class",palette=mypal, add="mean_sd") + geom_hline(yintercept=0,linetype = "dotted") + coord_cartesian(ylim=c(-4,4))
```
```{r fig.width=4, fig.height=3}
mdf <- reshape2::melt(dplyr::select(results,c("class","raw.lfc.pds","raw.lfc.pdc")))
ggboxplot(mdf,x ="variable",y="value",fill="class",palette=mypal) + coord_cartesian(ylim=c(-3,3))
```

```{r fig.width=3, fig.height=3}
ggdensity(results,x="raw.lfc.pds",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
```

```{r fig.width=3, fig.height=3}
ggdensity(results,x="raw.lfc.pdc",color="class", fill="class",palette=mypal) + geom_vline(xintercept = 0, linetype ="dotted")
```


```{r fig.width=4, fig.height=4}
ggscatterhist(results,x ="log2.mock",y="log2.pds",size = 0.2, alpha=0.1,color="deseq.sig.pds",margin.params = list(fill="class",color="black",size=0.2))
```
```{r fig.width=4, fig.height=4}
ggscatterhist(results,x ="log2.mock",y="log2.pdc",size = 0.2, alpha=0.1,color="deseq.sig.pdc",margin.params = list(fill="class",color="black",size=0.2))
```

```{r}
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$pro)
```

```{r fig.width=3, fig.height=2.5}
table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class)
mdf<-reshape2::melt(table(results$deseq.sig.pds & (results$deseq.lfc.pds > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()
```

```{r}
vl <- list(sig=grep("TRUE",results$deseq.sigup.pds),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)
```

```{r fig.width=3, fig.height=2.5}
table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class)
mdf<-reshape2::melt(table(results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),results$class))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()
```


```{r}
results$uid <- seq(1:nrow(results))
vl <- list(sig=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)
```
 
```{r fig.width=3, fig.height=2.5}
table(results$deseq.sigup.pds,results$deseq.sigup.pdc)
mdf<-reshape2::melt(table(results$deseq.sigup.pds,results$deseq.sigup.pdc))
ggplot(mdf,aes(Var1, Var2, fill= value)) + 
  geom_tile(show.legend = F) + geom_text(aes(label=value)) +
  scale_fill_gradient(low="white", high="orange") + theme_minimal()
```

```{r}
vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class),CTCFonly=grep("not",results$class))
plot(euler(vl),quantities=T)
```

```{r}
results$uid <- seq(1:nrow(results))
vl <- list(sig_PDS=grep("TRUE",results$deseq.sigup.pds),sig_PDC=grep("TRUE",results$deseq.sigup.pdc),CTCF_G4=grep("and",results$class))
plot(euler(vl),quantities=T)
```
```{r fig.width=6, fig.height=6}
results$sig.by.class.pds <- paste0(results$deseq.sigup.pds,"_",results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds] <- 0.5

ggscatter(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="log2.pds",size = 0.2, alpha="psize",color = "sig.by.class.pds",palette=c("#505050","#505050","red2","blue"))
```
```{r fig.width=6, fig.height=6}
ggscatter(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="deseq.lfc.pds",size = 0.2, alpha="psize",color = "sig.by.class.pds",palette=c("#505050","#505050","red2","blue"))
```

```{r fig.width=5, fig.height=5}
ggscatterhist(results[!grepl("NA",results$sig.by.class.pds),],x ="log2.mock",y="log2.pds",size = 0.4, alpha="psize",color = "sig.by.class.pds",margin.params = list(fill="sig.by.class.pds",color="black",size=0.2),palette=c("#505050","#505050","red2","blue"))
```

```{r fig.width=4, fig.height=4}
ggscatter(results,x ="raw.lfc.pds",y="log2.G4",size = 0.2, alpha=0.1,color="class")
```
```{r fig.width=4, fig.height=4}
ggscatter(results[results$pro=="Pro",],x ="log2.mock",y="G4_NT",size = 0.2, alpha=0.1,color="class",mean.point = T, mean.point.size = 4) + coord_cartesian(ylim=c(0,100))
```
```{r fig.width=4, fig.height=4}
ggscatter(results,x ="log2.mock",y="G4_NT",size = 0.2, alpha=0.1,color="name",mean.point = T, mean.point.size = 4, palette = mypal) + coord_cartesian(ylim=c(0,50))
```

```{r fig.width=3, fig.height=3}
results$G4.quantile <- cut_number(results$G4_NT, n = 5,labels=F)
ggstripchart(results,x="G4.quantile", y="deseq.lfc.pds",color="red2",add=c("violin","mean_sd"),add.params=list(color="black",fill="cornflowerblue"),size=0.2, alpha=results$deseq.sig.pds/4, palette=mypal) + coord_cartesian(ylim=c(-3,3)) + geom_hline(yintercept = 0, linetype="dotted")
```
```{r}
table(results$G4.quantile,results$deseq.sig.pds)
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x="G4.quantile", y="deseq.lfc.pds",fill="cornflowerblue",add="mean_sd") + coord_cartesian(ylim=c(-2,2)) + geom_hline(yintercept = 0, linetype="dotted")
```

```{r fig.width=3, fig.height=3}
ggviolin(results,x="G4.quantile", y="log2.G4",fill="cornflowerblue",add="mean_sd") + geom_hline(yintercept = 0, linetype="dotted")
```

```{r fig.width=4, fig.height=4}
ggscatterhist(results,x ="raw.lfc.pds",y="log2.G4",size = 0.2, alpha=0.1,color="deseq.sigup.pds",margin.params = list(fill="deseq.sigup.pds",color="black",size=0.2))
```


```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile("../data/CutNTag_ChIP-Seq/bw/GSM6634325_E14_Mock_G4.bw",peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
```
```{r fig.width=3, fig.height=3}
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile(G4_bigwigs,peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
```

```{r fig.width=3, fig.height=3}
plot_bw_profile(mocks_bigwigs[1],peak_cats,labels=c(peak_cats[[1]][1,]$name,peak_cats[[2]][1,]$name,peak_cats[[3]][1,]$name,peak_cats[[4]][1,]$name),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal)
```




```{r fig.width=12, fig.height=3}
p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,5,6)],upstream = 1500, downstream=1500)

ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)
```

```{r fig.width=12, fig.height=3}
p1 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[1]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p2 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[2]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p3 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[3]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)
p4 <- plot_bw_profile(c(mocks_bigwigs,pds_bigwigs),peak_cats[[4]],labels=c("mock1","mock2","trt1","rtr2"),mode="center",show_error = T,verbose=F, remove_top=0.001, colors=mypal2[c(9,10,3,4)],upstream = 1500, downstream=1500)

ggarrange(p1,p2,p3,p4,ncol = 4,nrow = 1)
```
### cen we learn something from the genes with top-most increase in CTCF?
```{r}
sigup.pds <- results[results$deseq.sigup.pds,]
sigup.pdc <- results[results$deseq.sigup.pdc,]

#export.bed(sigup.pds,"peaks_sigup_pds.bed")
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
```